'''
网页爬虫——解析网页——高效爬虫——爬虫高级库
html css javascript
'''
from urllib.request import urlopen
import re
html = urlopen("http://www.weather.com.cn/weather1d/101280101.shtml#dingzhi_first").read().decode('utf-8')
res = re.findall(r"<title>(.+?)</title>",html)  #正则表达式匹配
print(res)
res2 = re.findall(r"<p>(.*?)</p>",html,flags = re.M)
print(res2)
re3 = re.findall(r"href=(.*?)",html)
print(re3)